/* This is (will be) version 1.0 of quote.cgi The program will get a homepage and randomly quote a part of it as specified in the file defined as CONFIGFILE later. geturl() function by km@multi.fi (Kimmo Makkonen) is used instead of lynx Temporary files are not used anymore, More configuration options are provided hingo@multi.fi (Henrik Ingo) */ /*kimmos*/ #include #include #include #include #include #include /*/kimmos*/ #include #include #include #include #include #define TITLE "Quote.cgi 1.0 (beta) by hingo@multi.fi, august 1997." /*the configurationfile:*/ #define CONFIGFILE "quote1rc" /*variables used in the configfile*/ #define URL "URL=" #define LINKS "LINKS=" #define BEGIN "BEGIN=" #define END "END=" #define NQUOTES "NQUOTES=" #define HEADER "HEADER=" #define FROM "FROM=" /*errors*/ #define BUGREPORTTO "hingo@multi.fi" #define CONFERR 1 #define URLERR 2 #define HTMLERR 3 #define MEMERR 4 #define TRUE 1 #define FALSE 0 /*kimmos*/ struct urlinfo { char prot[10]; char host[64]; int port; char path[256]; }; /*/kimmos*/ int * begin_points=NULL; /*kimmos*/ void parseurl(char *url, struct urlinfo *urldata) { int i, j; char port[10]; /* get the protocol */ for (i = 0; url[i] != 0 && url[i] != ':'; i++) { urldata->prot[i] = url[i]; } urldata->prot[i] = '\0'; i += 3; /* get the host name or ip */ for (j = 0; url[i] != 0 && url[i] != ':' && url[i] != '/'; i++) { urldata->host[j++] = url[i]; } urldata->host[j] = '\0'; /* get the optional port number */ if (url[i] == ':') { i++; for (j = 0; url[i] != 0 && url[i] != '/' && isdigit(url[i]); i++) { port[j++] = url[i]; } port[j] = '\0'; urldata->port = atoi(port); } if (urldata->port == 0) urldata->port = 80; /* get the path */ for (j = 0; url[i] != 0; i++) { urldata->path[j++] = url[i]; } urldata->path[j] = '\0'; if (urldata->path[j - 1] != '/') { strcat(urldata->path, "/"); } } char *geturl(char *url) { struct urlinfo urldata; struct hostent *hostp; static char *content=NULL; char buffer[1024]; int sockid, n, size=0, memsize = 8192, blocksize = 4096; struct sockaddr_in sock; char request[512]; if ( content != NULL) free(content); content = (char *)malloc(memsize); parseurl(url, &urldata); /* create socket */ if ((sockid = socket(AF_INET, SOCK_STREAM, 0)) < 0) { printf("error creating socket, error: %s\n", strerror(errno)); exit(0); } /* name the socket using wildcards */ bzero((char *) &sock, sizeof(sock)); sock.sin_family = AF_INET; // sock.sin_addr.s_addr = inet_addr(urldata.host); hostp = gethostbyname(urldata.host); bcopy(hostp->h_addr, &sock.sin_addr, hostp->h_length); sock.sin_port = htons(urldata.port); if (connect(sockid, (struct sockaddr *) &sock, sizeof(sock)) < 0) { printf("error connecting to server, error: %s\n", strerror(errno)); exit(0); } /* make and send request to the server */ strcpy(request, "GET "); strcat(request, urldata.path); strcat(request, "\n"); write(sockid, request, strlen(request)); /* read response from server */ while ((n = read(sockid, buffer, sizeof(buffer))) > 0) { size += n; if (size > memsize) content = (char *)realloc(content, size + blocksize); strncat(content, buffer, n); } close(sockid); return content; } /*/kimmos*/ void fatal(int errtype, char * string) { printf("

Quote.cgi error!

\n"); if(errtype==CONFERR) { printf("

An error occured while trying to read the "); printf("configuration file.
\n"); printf("The file '%s'", CONFIGFILE); printf("' may be of wrong format or missing.

\n"); } else if(errtype==URLERR) { printf("

An error occured while trying "); printf("to get the page %s.

\n", string); } else if(errtype==HTMLERR) { printf("

An error occured while processing the HTML-code from "); printf(" %s .
\n", string,string); printf("Probably the page and the configurationfiles don't match."); } else if(errtype==MEMERR) { printf("

Function %s failed to allocate memory.\n", string); printf("Unable to continue.\n"); } printf("

Bugreports to: ");printf(BUGREPORTTO);printf(""); exit(errtype); } /*skip() is used to scroll a file until a line not beginning with a # It also skips empty lines and lines beginning with a whitespace. returns EOF if end of file is reached*/ int skip(FILE * fp) { char ch=!EOF, dummy[1024]; ch = fgetc(fp); while(ch==' ' || ch=='\n' || ch=='\r' || ch=='\t' || ch == '#') { if( ch == EOF) break; else fgets(dummy, 1023, fp); ch = fgetc(fp); } /*the last character taken was not a # so we'd better return it*/ ungetc(ch, fp); return(ch); } void clearend(char *str) { if(!isgraph(str[strlen(str)-1])) { str[strlen(str)-1]='\0'; clearend(str); } } /*case insensitive*/ int match_char(char a, char b) { if ((tolower(a) == tolower(b)) || (a == '"' && b == '\'') || (a == '\'' && b == '"')) return(TRUE); else return(FALSE); } /*match() returns TRUE if pattern matches the current position i in string. i is set to the position after the matching pattern in string */ int match(char * string, int * i, char * pattern) { if(match_char(string[*i], pattern[0])) { *i = *i + 1; if(pattern[1] == '\0') return(TRUE); else if(string[*i] == '\0') return(FALSE); else return(match(string, i, &pattern[1])); } else { return(FALSE); } } /* */ int find_begins(char * string, char * pattern) { int found = 0, i = 0; begin_points = (int *) malloc(sizeof(int)); if(NULL == begin_points ) fatal(MEMERR, "malloc()"); while(TRUE) { if(match( string, &i, pattern) == TRUE) { begin_points = (int *) realloc(begin_points, (found+1)*sizeof(int)); if(NULL == begin_points) fatal(MEMERR, "realloc()"); begin_points[found] = i; found++; } else i++; if(string[i] == '\0') return(found); } } /*returns number of characters from beginning of string to beginning of pattern. */ int find_end(char * string, char * pattern, int nquotes) { int i = 0; while(TRUE) { if( match( string, &i, pattern) == TRUE) { nquotes--; if(nquotes == 0) return(i - strlen(pattern)); } else i++; if(string[i] == '\0') return(i); } } void getconf(char * url, int * links, int * nquotes, char * begin, char * end, char * from, char * header) { FILE * fp; char buffer[1024]={'\0'}; fp=fopen(CONFIGFILE, "r"); if(fp == NULL) fatal(CONFERR, CONFIGFILE); /*The first line of CONFIGFILE is the header to print*/ if(skip(fp)==EOF) fatal(CONFERR, CONFIGFILE); if((!strncmp(buffer, HEADER, strlen(HEADER))) || fgets(buffer, 1024, fp)==NULL) fatal(CONFERR, CONFIGFILE); else strcpy(header, &buffer[strlen(HEADER)]); /*Scroll until a line with URL is found*/ while(url[0]=='\0') { if(fgets( buffer, 1024, fp) == NULL) fatal(CONFERR, CONFIGFILE); if(!strncmp(buffer, URL, strlen(URL))) strcpy(url, &buffer[strlen(URL)]); } /*Then read the other variables*/ if(skip(fp)!=EOF) { buffer[0]='#'; /* buffer[] != URL*/ while(strncmp(buffer, URL, strlen(URL)) && fgets(buffer, 1023,fp) != NULL && skip(fp) != EOF) { if(!strncmp( buffer, LINKS, strlen(LINKS))) *links = buffer[strlen(LINKS)] - '0'; else if(!strncmp( buffer, NQUOTES, strlen(NQUOTES))) *nquotes = buffer[strlen(NQUOTES)] - '0'; else if(!strncmp( buffer, BEGIN, strlen(BEGIN))) strcpy(begin, &buffer[strlen(BEGIN)]); else if(!strncmp( buffer,END, strlen(END))) strcpy(end, &buffer[strlen(END)]); else if(!strncmp( buffer,FROM, strlen(FROM))) strcpy(from, &buffer[strlen(FROM)]); else fatal(CONFERR, CONFIGFILE); } } fclose(fp); } int main(void) { char url[257]={'\0'}, begin[32]="

", end[32]="

"; char from[1024]={'\0'}, header[1024]={'\0'}, buffer[1024]={'\0'}; char * content; // int * begin_points=NULL; int links=0, nquotes=1, found, n, m; time_t t; srand(time(&t)); printf("Content-type: text/html\n\n"); /*Getting the configurations*/ getconf(url, &links, &nquotes, begin, end, from, header); clearend(url); clearend(begin); clearend(end); /*Get the first page*/ content = geturl(url); /*and follow links to the page to quote from*/ while(links > 0) { if(begin_points != NULL) free(begin_points); /*count the urls given in the document*/ found = find_begins(content, " so we can tell what we've quoted*/ if(begin_points != NULL) free(begin_points); find_begins(content, ""); m = find_end(content+begin_points[0], "", 1); strncpy(buffer, content+begin_points[0], m); /*quote from content*/ if(begin_points != NULL) free(begin_points); found = find_begins(content, begin); if( found == 0 ) fatal(HTMLERR, url); n = 1 + (double) found * (double) rand() / RAND_MAX; /*look for the endpoint*/ found = find_end( content+n, end, nquotes); /*lets go for the output!*/ printf("\n\n\n%s\n", TITLE); printf("\n", url); printf("\n\n"); printf("

%s

\n

%s", header, begin); for(;found > n; n++) putchar(content[n]); printf("%s\n

\n",end); printf("

\nFrom: %s, '%s'
\n", from, buffer); printf("
%s\n", url, url); printf("\n"); return(0); }