| 1 | /* ==----------------------------- w2ci.c ------------------------------== */ |
|---|
| 2 | |
|---|
| 3 | /* |
|---|
| 4 | <request> |
|---|
| 5 | //<docresponse> |
|---|
| 6 | |
|---|
| 7 | use parmstring |
|---|
| 8 | or parmstring+=" text=txt1vp+txtvp[\n]" (if movetxtp) |
|---|
| 9 | |
|---|
| 10 | <trigrams doc="1"> |
|---|
| 11 | coll=trigcoll (cmd, awtfp, parmstring, reply); |
|---|
| 12 | </trigrams> |
|---|
| 13 | |
|---|
| 14 | <jdlist doc="1" jds="n"> |
|---|
| 15 | <jd doc="1" rank="n" f="n" w="n"> |
|---|
| 16 | </jd> |
|---|
| 17 | </jdlist> |
|---|
| 18 | //</docresponse> |
|---|
| 19 | |
|---|
| 20 | <jdlist doc="all" jds="n"> |
|---|
| 21 | <jd doc="all" rank="n" f="n" w="n"> |
|---|
| 22 | </jd> |
|---|
| 23 | </jdlist> |
|---|
| 24 | </request> |
|---|
| 25 | */ |
|---|
| 26 | |
|---|
| 27 | /* setup text= documents and run them |
|---|
| 28 | */ |
|---|
| 29 | |
|---|
| 30 | if (txtvp) { // if text1= and/or text= |
|---|
| 31 | |
|---|
| 32 | //lines extracted from txtvp (titulos/texto) |
|---|
| 33 | char *p; |
|---|
| 34 | |
|---|
| 35 | //size of titulos |
|---|
| 36 | int left=txtvlen; |
|---|
| 37 | int xocc=0; |
|---|
| 38 | |
|---|
| 39 | //collection used |
|---|
| 40 | int coll = -1; |
|---|
| 41 | WTFUN_XWT *xwtp=NULL; //=awtfp->vwtfp[coll]; // set defered |
|---|
| 42 | |
|---|
| 43 | //freq - reinit jdtable |
|---|
| 44 | hashzero(jdtable,jdmaxprim,jdtabwidth); jdtabentries=0; |
|---|
| 45 | |
|---|
| 46 | //freq - collection (Au set) |
|---|
| 47 | memset(JDCOLFREQ,0x00,jdmaxprim*sizeof(LONGX)); |
|---|
| 48 | memset(JDCOLPESO,0x00,jdmaxprim*sizeof(float)); |
|---|
| 49 | |
|---|
| 50 | |
|---|
| 51 | //header |
|---|
| 52 | sprintf(reply,"<request>\n",0); reply+=strlen(reply); |
|---|
| 53 | |
|---|
| 54 | //docheader |
|---|
| 55 | //sprintf(reply,"<docresponse>\n"); reply+=strlen(reply); |
|---|
| 56 | |
|---|
| 57 | /* force several text= parms formed by Au[1]+Ti[x] |
|---|
| 58 | */ |
|---|
| 59 | for (p=txtvp; left; ) { // for each line of text= |
|---|
| 60 | |
|---|
| 61 | //yes text1=.. |
|---|
| 62 | if (movetxtp) { |
|---|
| 63 | int len; |
|---|
| 64 | char *x=movetxtp; // as loadcoll |
|---|
| 65 | while (isspace(*p)) if (left) { p++; left--; } //strip |
|---|
| 66 | if (!left) break; |
|---|
| 67 | /* new msg: text= */ |
|---|
| 68 | strcpy(parmstring,hdrparmstring); |
|---|
| 69 | *x++=' '; |
|---|
| 70 | for (len=0; left; p++, len++, left--) { // copy 2nd text |
|---|
| 71 | //if (striptext) |
|---|
| 72 | if (*p=='\n' || *p=='\r') break; // until newline |
|---|
| 73 | *x++ = *p; |
|---|
| 74 | } |
|---|
| 75 | if (!len) continue; // skip null lines |
|---|
| 76 | *x='\0'; |
|---|
| 77 | } |
|---|
| 78 | else { |
|---|
| 79 | left=0; // single run |
|---|
| 80 | } |
|---|
| 81 | //if (1) fprintf(stderr,"[ [parmstring=%s] ]\n",parmstring); |
|---|
| 82 | |
|---|
| 83 | /* init AuTi |
|---|
| 84 | */ |
|---|
| 85 | //freq - AuTi subset |
|---|
| 86 | memset(JDDOCFREQ,0x00,jdmaxprim*sizeof(LONGX)); |
|---|
| 87 | memset(JDDOCPESO,0x00,jdmaxprim*sizeof(float)); |
|---|
| 88 | |
|---|
| 89 | /* get similars |
|---|
| 90 | */ |
|---|
| 91 | ++xocc; |
|---|
| 92 | |
|---|
| 93 | sprintf(reply,"<trigrams doc=\"%d\">\n",xocc); reply+=strlen(reply); |
|---|
| 94 | |
|---|
| 95 | coll=trigcoll (cmd, awtfp, parmstring, reply); reply+=strlen(reply); |
|---|
| 96 | |
|---|
| 97 | sprintf(reply,"</trigrams>\n"); reply+=strlen(reply); |
|---|
| 98 | |
|---|
| 99 | if (coll < 0) return coll; |
|---|
| 100 | rc=coll; /* return code */ |
|---|
| 101 | |
|---|
| 102 | /* set current xwtfp |
|---|
| 103 | */ |
|---|
| 104 | xwtp=awtfp->vwtfp[coll]; // set current |
|---|
| 105 | if (cmd >= 3) fprintf(stderr, "+++ coll used=%d\n", coll); |
|---|
| 106 | |
|---|
| 107 | /* show docjdlist |
|---|
| 108 | */ |
|---|
| 109 | if (jk_n && jk_tabcats[0]/*joinjd*/) { |
|---|
| 110 | |
|---|
| 111 | char *batchp=awtdatabuff; |
|---|
| 112 | int jocc; |
|---|
| 113 | LONGX hidx; |
|---|
| 114 | char *h; |
|---|
| 115 | #define TOPJD 1 |
|---|
| 116 | #if TOPJD |
|---|
| 117 | LISTJD *l; |
|---|
| 118 | int howmany=jk_tabcats[0]; //6; |
|---|
| 119 | float minweight=0.1; |
|---|
| 120 | LONGX freq; |
|---|
| 121 | float peso; |
|---|
| 122 | int docjds; |
|---|
| 123 | docjds=w2topjd (cmd, awtfp, coll, JDDOCFREQ, JDDOCPESO, howmany, minweight); |
|---|
| 124 | #else |
|---|
| 125 | LONGX *docfreqp; |
|---|
| 126 | LONGX loop; |
|---|
| 127 | int docjds=0; |
|---|
| 128 | for (docfreqp=JDDOCFREQ, loop=jdmaxprim; loop--; docfreqp++) { |
|---|
| 129 | if (*docfreqp) docjds++; |
|---|
| 130 | } |
|---|
| 131 | #endif //TOPJD |
|---|
| 132 | if (replyhx) { |
|---|
| 133 | sprintf(reply,"<jdlist doc=\"%d\" jds=\"%d\">\n",xocc,docjds); |
|---|
| 134 | reply+=strlen(reply); |
|---|
| 135 | } |
|---|
| 136 | |
|---|
| 137 | awtdatabuff[0]='\0'; |
|---|
| 138 | #if TOPJD |
|---|
| 139 | for (l=listjdhdr, jocc=0; l; l=l->next) { |
|---|
| 140 | if (!l) break; |
|---|
| 141 | hidx=l->hidx; h=jdtable+(l->hidx*jdhwidth); |
|---|
| 142 | freq=l->freq; //JDDOCPESO[hidx]; |
|---|
| 143 | peso=l->weight; |
|---|
| 144 | #else |
|---|
| 145 | for (docfreqp=JDDOCFREQ, h=jdtable, hidx=0, jocc=0; jocc < docjds; hidx++, h+=jdhwidth, docfreqp++) { |
|---|
| 146 | if (!*docfreqp) continue; |
|---|
| 147 | freq= *docfreqp; |
|---|
| 148 | peso= JDDOCPESO[hidx]; |
|---|
| 149 | #endif //TOPJD |
|---|
| 150 | jocc++; |
|---|
| 151 | |
|---|
| 152 | //reply AuTi subset |
|---|
| 153 | if (replyhx) { |
|---|
| 154 | //sprintf(reply,"<jd doc=\"%d\" rank=\"%d\" f=\"%"_LD_"\" w=\"%f\" h=\"%"_LD_"\">%s</jd>\n",xocc,jocc,freq,peso/* /freq */,hidx,h); |
|---|
| 155 | //reply+=strlen(reply); |
|---|
| 156 | char *p; |
|---|
| 157 | sprintf(reply,"<jd doc=\"%d\" rank=\"%d\" f=\"%"_LD_"\" w=\"%f\">",xocc,jocc,freq,peso); |
|---|
| 158 | reply+=strlen(reply); |
|---|
| 159 | for (p=h; *p; p++) { |
|---|
| 160 | if (convent[*p]) { strcpy(reply,convent[*p]); reply+=strlen(reply); } |
|---|
| 161 | else *reply++ = *p; |
|---|
| 162 | } |
|---|
| 163 | sprintf(reply,"</jd>\n"); |
|---|
| 164 | reply+=strlen(reply); |
|---|
| 165 | |
|---|
| 166 | } |
|---|
| 167 | |
|---|
| 168 | if (addeight) { |
|---|
| 169 | sprintf(batchp,"<8 0>%d^r%d^r%d^f%"_LD_"^w%f^h%"_LD_"^d%s</8>",docjds,xocc,jocc,freq,peso/* /freq */,hidx,h); |
|---|
| 170 | batchp+=strlen(batchp); |
|---|
| 171 | } |
|---|
| 172 | } |
|---|
| 173 | if (awtdatabuff[0]) { |
|---|
| 174 | if (fldupdat(outirec,awtdatabuff)) fatal("wtrig2/similardb/collapse/fldupdat"); |
|---|
| 175 | if (*docydb) recupdat(outcrec,outirec); |
|---|
| 176 | } |
|---|
| 177 | |
|---|
| 178 | if (replyhx) { |
|---|
| 179 | sprintf(reply,"</jdlist>\n"); |
|---|
| 180 | reply+=strlen(reply); |
|---|
| 181 | } |
|---|
| 182 | |
|---|
| 183 | } // end if jdjoin |
|---|
| 184 | |
|---|
| 185 | //no text1=.. |
|---|
| 186 | if (!txt1vp) break; |
|---|
| 187 | |
|---|
| 188 | } // end for each line of text= |
|---|
| 189 | |
|---|
| 190 | //doctrailer |
|---|
| 191 | //sprintf(reply,"</docresponse>\n"); reply+=strlen(reply); |
|---|
| 192 | |
|---|
| 193 | |
|---|
| 194 | /* show coljdlist |
|---|
| 195 | */ |
|---|
| 196 | if (jk_n && jk_tabwidth[0]/*joinjd*/) if (coll) { |
|---|
| 197 | |
|---|
| 198 | char *batchp=awtdatabuff; |
|---|
| 199 | int jocc; |
|---|
| 200 | LONGX hidx; |
|---|
| 201 | char *h; |
|---|
| 202 | #if TOPJD |
|---|
| 203 | LISTJD *l; |
|---|
| 204 | int howmany=jk_tabcats[0]; //6; |
|---|
| 205 | float minweight=0.1; |
|---|
| 206 | LONGX freq; |
|---|
| 207 | float peso; |
|---|
| 208 | int coljds; |
|---|
| 209 | coljds=w2topjd (cmd, awtfp, coll, JDCOLFREQ, JDCOLPESO, howmany, minweight); |
|---|
| 210 | #else |
|---|
| 211 | LONGX *colfreqp; |
|---|
| 212 | LONGX loop; |
|---|
| 213 | int coljds=0; |
|---|
| 214 | for (colfreqp=JDCOLFREQ, loop=jdmaxprim; loop--; colfreqp++) { |
|---|
| 215 | if (*colfreqp) coljds++; |
|---|
| 216 | } |
|---|
| 217 | #endif //TOPJD |
|---|
| 218 | if (replyhx) { |
|---|
| 219 | sprintf(reply,"<jdlist doc=\"all\" jds=\"%d\">\n",coljds); |
|---|
| 220 | reply+=strlen(reply); |
|---|
| 221 | } |
|---|
| 222 | |
|---|
| 223 | awtdatabuff[0]='\0'; |
|---|
| 224 | #if TOPJD |
|---|
| 225 | for (l=listjdhdr, jocc=0; l; l=l->next) { |
|---|
| 226 | if (!l) break; |
|---|
| 227 | hidx=l->hidx; h=jdtable+(l->hidx*jdhwidth); |
|---|
| 228 | freq=l->freq; //JDCOLPESO[hidx]; |
|---|
| 229 | peso=l->weight; |
|---|
| 230 | #else |
|---|
| 231 | for (colfreqp=JDCOLFREQ, h=jdtable, hidx=0, jocc=0; jocc < coljds; hidx++, h+=jdhwidth, colfreqp++) { |
|---|
| 232 | if (!*colfreqp) continue; |
|---|
| 233 | freq= *colfreqp; |
|---|
| 234 | peso= JDCOLPESO[hidx]; |
|---|
| 235 | #endif //TOPJD |
|---|
| 236 | jocc++; |
|---|
| 237 | |
|---|
| 238 | //reply AuTi subset |
|---|
| 239 | if (replyhx) { |
|---|
| 240 | //sprintf(reply,"<jd col=\"%d\" rank=\"%d\" f=\"%"_LD_"\" w=\"%f\" h=\"%"_LD_"\">%s</jd>\n",xocc,jocc,freq,peso/* /freq */,hidx,h); |
|---|
| 241 | //reply+=strlen(reply); |
|---|
| 242 | char *p; |
|---|
| 243 | sprintf(reply,"<jd doc=\"all\" rank=\"%d\" f=\"%"_LD_"\" w=\"%f\">",jocc,freq,peso); |
|---|
| 244 | reply+=strlen(reply); |
|---|
| 245 | for (p=h; *p; p++) { |
|---|
| 246 | if (convent[*p]) { strcpy(reply,convent[*p]); reply+=strlen(reply); } |
|---|
| 247 | else *reply++ = *p; |
|---|
| 248 | } |
|---|
| 249 | sprintf(reply,"</jd>\n"); |
|---|
| 250 | reply+=strlen(reply); |
|---|
| 251 | |
|---|
| 252 | } |
|---|
| 253 | |
|---|
| 254 | if (addeight) { |
|---|
| 255 | sprintf(batchp,"<9 0>%d^r%d^f%"_LD_"^w%f^h%"_LD_"^d%s</9>",coljds,jocc,freq,peso/* /freq */,hidx,h); |
|---|
| 256 | batchp+=strlen(batchp); |
|---|
| 257 | } |
|---|
| 258 | } |
|---|
| 259 | if (awtdatabuff[0]) { |
|---|
| 260 | if (fldupdat(outirec,awtdatabuff)) fatal("wtrig2/similardb/collapse/fldupdat"); |
|---|
| 261 | if (*docydb) recupdat(outcrec,outirec); |
|---|
| 262 | } |
|---|
| 263 | |
|---|
| 264 | if (replyhx) { |
|---|
| 265 | sprintf(reply,"</jdlist>\n"); |
|---|
| 266 | reply+=strlen(reply); |
|---|
| 267 | } |
|---|
| 268 | |
|---|
| 269 | } // end if jdjoin |
|---|
| 270 | |
|---|
| 271 | //trailer |
|---|
| 272 | sprintf(reply,"</request>\n"); |
|---|
| 273 | reply+=strlen(reply); |
|---|
| 274 | |
|---|
| 275 | } // end if text1= and/or text= |
|---|