update addon
This commit is contained in:
parent
b249ee20d5
commit
f06a17be53
@ -15,15 +15,15 @@ function student(text) {
|
||||
for (var i=0; i<tokens.length; i++) {
|
||||
|
||||
//token == german stopword
|
||||
if (util.array_contains(util.getGermanStopwords(), tokens[i])){
|
||||
if (util.arrayContains(util.getDeStopwords(), tokens[i])){
|
||||
deWordCount++;
|
||||
}
|
||||
//token == english stopword
|
||||
if (util.array_contains(util.getEnglishStopwords(), tokens[i])){
|
||||
if (util.arrayContains(util.getEnStopwords(), tokens[i])){
|
||||
enWordCount++;
|
||||
}
|
||||
//token == french stopword
|
||||
if (util.array_contains(util.getFrenchStopwords(), tokens[i])){
|
||||
if (util.arrayContains(util.getFrStopwords(), tokens[i])){
|
||||
frWordCount++;
|
||||
}
|
||||
|
||||
|
||||
@ -10,4 +10,33 @@ function tokenize(text) {
|
||||
}
|
||||
|
||||
|
||||
function arrayContains(a, obj) {
|
||||
for (var i = 0; i < a.length; i++) {
|
||||
if (a[i] === obj) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
|
||||
function getDeStopwords() {
|
||||
return ["aber","alle","allem","allen","aller","alles","als","also","am","an","ander","andere","anderem","anderen","anderer","anderes","anderm","andern","anderr","anders","auch","auf","aus","bei","bin","bis","bist","da","damit","dann","der","den","des","dem","die","das","daß","derselbe","derselben","denselben","desselben","demselben","dieselbe","dieselben","dasselbe","dazu","dein","deine","deinem","deinen","deiner","deines","denn","derer","dessen","dich","dir","du","dies","diese","diesem","diesen","dieser","dieses","doch","dort","durch","ein","eine","einem","einen","einer","eines","einig","einige","einigem","einigen","einiger","einiges","einmal","er","ihn","ihm","es","etwas","euer","eure","eurem","euren","eurer","eures","für","gegen","gewesen","hab","habe","haben","hat","hatte","hatten","hier","hin","hinter","ich","mich","mir","ihr","ihre","ihrem","ihren","ihrer","ihres","euch","im","in","indem","ins","ist","jede","jedem","jeden","jeder","jedes","jene","jenem","jenen","jener","jenes","jetzt","kann","kein","keine","keinem","keinen","keiner","keines","können","könnte","machen","man","manche","manchem","manchen","mancher","manches","mein","meine","meinem","meinen","meiner","meines","mit","muss","musste","nach","nicht","nichts","noch","nun","nur","ob","oder","ohne","sehr","sein","seine","seinem","seinen","seiner","seines","selbst","sich","sie","ihnen","sind","so","solche","solchem","solchen","solcher","solches","soll","sollte","sondern","sonst","über","um","und","uns","unse","unsem","unsen","unser","unses","unter","viel","vom","von","vor","während","war","waren","warst","was","weg","weil","weiter","welche","welchem","welchen","welcher","welches","wenn","werde","werden","wie","wieder","will","wir","wird","wirst","wo","wollen","wollte","würde","würden","zu","zum","zur","zwar","zwischen"];
|
||||
}
|
||||
|
||||
function getEnStopwords() {
|
||||
return ["i","me","my","myself","we","our","ours","ourselves","you","your","yours","yourself","yourselves","he","him","his","himself","she","her","hers","herself","it","its","itself","they","them","their","theirs","themselves","what","which","who","whom","this","that","these","those","am","is","are","was","were","be","been","being","have","has","had","having","do","does","did","doing","a","an","the","and","but","if","or","because","as","until","while","of","at","by","for","with","about","against","between","into","through","during","before","after","above","below","to","from","up","down","in","out","on","off","over","under","again","further","then","once","here","there","when","where","why","how","all","any","both","each","few","more","most","other","some","such","no","nor","not","only","own","same","so","than","too","very","s","t","can","will","just","don","should","now"];
|
||||
}
|
||||
|
||||
function getFrStopwords() {
|
||||
return ["au","aux","avec","ce","ces","dans","de","des","du","elle","en","et","eux","il","je","la","le","leur","lui","ma","mais","me","même","mes","moi","mon","ne","nos","notre","nous","on","ou","par","pas","pour","qu","que","qui","sa","se","ses","son","sur","ta","te","tes","toi","ton","tu","un","une","vos","votre","vous","c","d","j","l","à","m","n","s","t","y","été","étée","étées","étés","étant","étante","étants","étantes","suis","es","est","sommes","êtes","sont","serai","seras","sera","serons","serez","seront","serais","serait","serions","seriez","seraient","étais","était","étions","étiez","étaient","fus","fut","fûmes","fûtes","furent","sois","soit","soyons","soyez","soient","fusse","fusses","fût","fussions","fussiez","fussent","ayant","ayante","ayantes","ayants","eu","eue","eues","eus","ai","as","avons","avez","ont","aurai","auras","aura","aurons","aurez","auront","aurais","aurait","aurions","auriez","auraient","avais","avait","avions","aviez","avaient","eut","eûmes","eûtes","eurent","aie","aies","ait","ayons","ayez","aient","eusse","eusses","eût","eussions","eussiez","eussent"];
|
||||
}
|
||||
|
||||
|
||||
exports.getDeStopwords = getDeStopwords;
|
||||
exports.getEnStopwords = getEnStopwords;
|
||||
exports.getFrStopwords = getFrStopwords;
|
||||
|
||||
|
||||
exports.tokenize = tokenize;
|
||||
|
||||
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user