2016-07-11 12:23:59 +00:00
|
|
|
/* jwz-style threading
|
|
|
|
* clean-room implementation of https://www.jwz.org/doc/threading.html
|
|
|
|
* without looking at any code
|
|
|
|
*
|
|
|
|
* subject threading and sibling sorting is not done yet
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/stat.h>
|
2016-07-18 15:06:41 +00:00
|
|
|
#include <sys/types.h>
|
2016-07-11 12:23:59 +00:00
|
|
|
|
2016-07-18 15:06:41 +00:00
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
2016-07-11 12:23:59 +00:00
|
|
|
#include <search.h>
|
2016-07-18 15:06:41 +00:00
|
|
|
#include <stdint.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
2016-07-19 15:31:49 +00:00
|
|
|
#include <time.h>
|
2016-07-18 15:06:41 +00:00
|
|
|
#include <unistd.h>
|
2016-07-11 12:23:59 +00:00
|
|
|
|
|
|
|
#include "blaze822.h"
|
|
|
|
|
2016-07-20 16:47:40 +00:00
|
|
|
static int vflag;
|
2017-06-30 11:28:05 +00:00
|
|
|
static int optional;
|
2016-07-20 16:47:40 +00:00
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
struct container {
|
2016-07-19 15:31:49 +00:00
|
|
|
char *mid;
|
2016-07-11 12:23:59 +00:00
|
|
|
char *file;
|
|
|
|
struct message *msg;
|
2016-07-19 15:31:49 +00:00
|
|
|
time_t date;
|
2016-07-11 12:23:59 +00:00
|
|
|
struct container *parent;
|
|
|
|
struct container *child;
|
|
|
|
struct container *next;
|
2017-06-30 11:28:05 +00:00
|
|
|
int optional;
|
2016-07-11 12:23:59 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
static void *mids;
|
|
|
|
|
|
|
|
int
|
|
|
|
midorder(const void *a, const void *b)
|
|
|
|
{
|
2016-07-19 15:31:49 +00:00
|
|
|
struct container *ia = (struct container *)a;
|
|
|
|
struct container *ib = (struct container *)b;
|
2016-07-11 12:23:59 +00:00
|
|
|
|
|
|
|
return strcmp(ia->mid, ib->mid);
|
|
|
|
}
|
|
|
|
|
|
|
|
char *
|
|
|
|
mid(struct message *msg)
|
|
|
|
{
|
|
|
|
char *v;
|
|
|
|
v = blaze822_hdr(msg, "message-id");
|
|
|
|
// XXX intern mid?
|
|
|
|
if (v) {
|
|
|
|
char *m;
|
|
|
|
|
|
|
|
m = strchr(v, '<');
|
|
|
|
if (!m)
|
2016-07-11 14:12:12 +00:00
|
|
|
return strdup(v);
|
2016-07-11 12:23:59 +00:00
|
|
|
v = strchr(m, '>');
|
|
|
|
if (!v)
|
2016-07-11 14:12:12 +00:00
|
|
|
return strdup(m);
|
2016-07-11 12:23:59 +00:00
|
|
|
return strndup(m+1, v-m-1);
|
2016-07-11 14:12:12 +00:00
|
|
|
} else {
|
|
|
|
// invent new message-id for internal tracking
|
|
|
|
static long i;
|
|
|
|
char buf[32];
|
2016-07-14 16:21:07 +00:00
|
|
|
snprintf(buf, sizeof buf, "thread%08ld@localhost", ++i);
|
2016-07-11 14:12:12 +00:00
|
|
|
return strdup(buf);
|
2016-07-11 12:23:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct container *
|
|
|
|
midcont(char *mid)
|
|
|
|
{
|
|
|
|
struct container key, **result;
|
|
|
|
key.mid = mid;
|
|
|
|
|
|
|
|
if (!(result = tfind(&key, &mids, midorder))) {
|
|
|
|
struct container *c = malloc(sizeof (struct container));
|
|
|
|
c->mid = mid;
|
|
|
|
c->file = 0;
|
|
|
|
c->msg = 0;
|
2016-07-19 15:31:49 +00:00
|
|
|
c->date = -1;
|
2017-06-30 11:28:05 +00:00
|
|
|
c->optional = 0;
|
2016-07-11 12:23:59 +00:00
|
|
|
c->parent = c->child = c->next = 0;
|
|
|
|
return *(struct container **)tsearch(c, &mids, midorder);
|
|
|
|
} else {
|
|
|
|
return *result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct container *
|
|
|
|
store_id(char *file, struct message *msg)
|
|
|
|
{
|
|
|
|
struct container *c;
|
|
|
|
|
|
|
|
c = midcont(mid(msg));
|
2016-07-11 14:12:12 +00:00
|
|
|
c->file = strdup(file);
|
2016-07-11 12:23:59 +00:00
|
|
|
c->msg = msg;
|
2017-06-30 11:28:05 +00:00
|
|
|
c->optional = optional;
|
2016-07-11 12:23:59 +00:00
|
|
|
|
|
|
|
return c;
|
|
|
|
}
|
|
|
|
|
2016-07-11 14:12:12 +00:00
|
|
|
int
|
|
|
|
reachable(struct container *child, struct container *parent)
|
|
|
|
{
|
2016-07-11 14:52:04 +00:00
|
|
|
int r = 0;
|
|
|
|
|
2016-07-11 14:12:12 +00:00
|
|
|
if (strcmp(child->mid, parent->mid) == 0)
|
|
|
|
return 1;
|
2016-07-11 14:52:04 +00:00
|
|
|
if (child->child)
|
|
|
|
r |= reachable(child->child, parent);
|
|
|
|
if (child->next)
|
|
|
|
r |= reachable(child->next, parent);
|
|
|
|
return r;
|
2016-07-11 14:12:12 +00:00
|
|
|
}
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
void
|
|
|
|
thread(char *file)
|
|
|
|
{
|
|
|
|
struct message *msg;
|
|
|
|
|
2017-06-29 14:03:13 +00:00
|
|
|
while (*file == ' ' || *file == '\t')
|
|
|
|
file++;
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
msg = blaze822(file);
|
|
|
|
if (!msg)
|
|
|
|
return;
|
|
|
|
|
|
|
|
struct container *c = store_id(file, msg);
|
|
|
|
|
|
|
|
char *mid = "";
|
|
|
|
|
2016-07-14 16:21:38 +00:00
|
|
|
char *v, *m;
|
2016-07-11 12:23:59 +00:00
|
|
|
struct container *parent = 0, *me = 0;
|
|
|
|
|
2016-07-19 15:31:49 +00:00
|
|
|
if ((v = blaze822_hdr(msg, "date"))) {
|
|
|
|
c->date = blaze822_date(v);
|
|
|
|
} else {
|
|
|
|
c->date = -1;
|
|
|
|
}
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
v = blaze822_hdr(msg, "references");
|
|
|
|
if (v) {
|
|
|
|
parent = 0;
|
|
|
|
while (1) {
|
|
|
|
m = strchr(v, '<');
|
|
|
|
if (!m)
|
|
|
|
break;
|
|
|
|
v = strchr(m, '>');
|
|
|
|
if (!v)
|
|
|
|
break;
|
|
|
|
mid = strndup(m+1, v-m-1);
|
|
|
|
// XXX free?
|
|
|
|
|
|
|
|
me = midcont(mid);
|
2016-07-11 14:12:12 +00:00
|
|
|
|
|
|
|
if (me == c)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (parent && !me->parent &&
|
|
|
|
!reachable(me, parent) && !reachable(parent, me)) {
|
|
|
|
me->parent = parent;
|
|
|
|
me->next = parent->child;
|
2016-07-11 12:23:59 +00:00
|
|
|
parent->child = me;
|
|
|
|
}
|
|
|
|
|
|
|
|
parent = me;
|
|
|
|
}
|
|
|
|
}
|
2017-08-31 15:30:17 +00:00
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
v = blaze822_hdr(msg, "in-reply-to");
|
|
|
|
char *irt;
|
|
|
|
if (v) {
|
|
|
|
m = strchr(v, '<');
|
|
|
|
if (!m)
|
|
|
|
goto out;
|
|
|
|
v = strchr(m, '>');
|
|
|
|
if (!v)
|
|
|
|
goto out;
|
|
|
|
irt = strndup(m+1, v-m-1);
|
2017-08-31 15:30:17 +00:00
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
if (strcmp(irt, mid) != 0) {
|
|
|
|
parent = midcont(irt);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
out:
|
|
|
|
|
2016-07-11 14:12:12 +00:00
|
|
|
if (parent && parent != c) {
|
|
|
|
struct container *r;
|
|
|
|
|
2017-06-29 20:18:46 +00:00
|
|
|
// check we don't introduce a new loop
|
|
|
|
if (reachable(parent, c) || reachable(c, parent))
|
|
|
|
goto out2;
|
|
|
|
|
2016-07-11 14:12:12 +00:00
|
|
|
if (c->parent == parent) { // already correct
|
|
|
|
goto out2;
|
|
|
|
} else if (c->parent) {
|
|
|
|
// if we already have a wrong parent, orphan us first
|
|
|
|
|
2016-07-11 14:52:04 +00:00
|
|
|
if (c->parent->child == c) // first in list
|
2016-07-20 10:58:30 +00:00
|
|
|
c->parent->child = c->parent->child->next;
|
2016-07-11 14:12:12 +00:00
|
|
|
for (r = c->parent->child; r; r = r->next) {
|
|
|
|
if (r->next == c)
|
|
|
|
r->next = c->next;
|
|
|
|
}
|
2016-07-20 10:58:30 +00:00
|
|
|
|
|
|
|
c->next = 0;
|
2016-07-11 14:12:12 +00:00
|
|
|
}
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
c->parent = parent;
|
2016-07-11 14:12:12 +00:00
|
|
|
|
2016-07-13 14:13:29 +00:00
|
|
|
// add at the end
|
2016-07-20 10:58:30 +00:00
|
|
|
if (!parent->child) {
|
2016-07-13 14:13:29 +00:00
|
|
|
parent->child = c;
|
|
|
|
} else {
|
2016-07-20 10:58:30 +00:00
|
|
|
for (r = parent->child; r && r->next; r = r->next)
|
|
|
|
if (r == c)
|
|
|
|
goto out2;
|
2016-07-13 14:13:29 +00:00
|
|
|
r->next = c;
|
|
|
|
c->next = 0;
|
2016-07-11 14:12:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
out2:
|
|
|
|
// someone said our parent was our child, a lie
|
|
|
|
if (c->child == c->parent) {
|
|
|
|
c->child->parent = 0;
|
|
|
|
c->child = 0;
|
|
|
|
}
|
2016-07-11 12:23:59 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-19 15:31:49 +00:00
|
|
|
time_t
|
|
|
|
newest(struct container *c, int depth)
|
|
|
|
{
|
|
|
|
time_t n = -1;
|
|
|
|
|
|
|
|
if (!c)
|
|
|
|
return n;
|
|
|
|
|
|
|
|
do {
|
|
|
|
if (c->child) {
|
|
|
|
time_t r = newest(c->child, depth+1);
|
|
|
|
if (n < r)
|
|
|
|
n = r;
|
|
|
|
}
|
|
|
|
if (n < c->date)
|
|
|
|
n = c->date;
|
|
|
|
} while ((c = c->next));
|
|
|
|
|
|
|
|
return n;
|
|
|
|
}
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
struct container *top;
|
|
|
|
struct container *lastc;
|
|
|
|
|
|
|
|
void
|
|
|
|
find_root(const void *nodep, const VISIT which, const int depth)
|
|
|
|
{
|
2016-07-19 15:31:49 +00:00
|
|
|
(void)depth;
|
2016-07-11 12:23:59 +00:00
|
|
|
|
2016-07-19 15:31:49 +00:00
|
|
|
if (which == preorder || which == leaf) {
|
2016-07-11 12:23:59 +00:00
|
|
|
struct container *c = *(struct container **)nodep;
|
|
|
|
|
|
|
|
if (!c->parent) {
|
|
|
|
lastc->next = c;
|
|
|
|
c->next = 0;
|
2016-07-19 15:31:49 +00:00
|
|
|
time_t r = newest(c->child, 0);
|
|
|
|
if (c->date < r)
|
|
|
|
c->date = r;
|
2016-07-11 12:23:59 +00:00
|
|
|
lastc = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
find_roots()
|
|
|
|
{
|
2017-08-31 15:30:17 +00:00
|
|
|
top = malloc(sizeof (struct container));
|
2016-07-11 12:23:59 +00:00
|
|
|
top->msg = 0;
|
2016-07-19 15:31:49 +00:00
|
|
|
top->date = -1;
|
2016-07-19 12:58:16 +00:00
|
|
|
top->file = 0;
|
2016-07-11 12:23:59 +00:00
|
|
|
top->next = top->child = top->parent = 0;
|
2017-06-30 11:28:05 +00:00
|
|
|
top->optional = 0;
|
2016-07-11 12:23:59 +00:00
|
|
|
top->mid = "(top)";
|
|
|
|
|
|
|
|
lastc = top;
|
|
|
|
|
|
|
|
twalk(mids, find_root);
|
2016-07-11 14:12:12 +00:00
|
|
|
|
|
|
|
top->child = top->next;
|
|
|
|
top->next = 0;
|
2016-07-11 12:23:59 +00:00
|
|
|
}
|
|
|
|
|
2016-07-18 17:42:25 +00:00
|
|
|
void
|
|
|
|
prune_tree(struct container *c, int depth)
|
|
|
|
{
|
|
|
|
do {
|
|
|
|
if (c->child)
|
|
|
|
prune_tree(c->child, depth+1);
|
2016-07-19 14:56:09 +00:00
|
|
|
if (depth >= 0 && !c->file && c->child && !c->child->next) {
|
2016-07-18 17:42:25 +00:00
|
|
|
// turn into child if we don't exist and only have a child
|
|
|
|
c->mid = c->child->mid;
|
|
|
|
c->file = c->child->file;
|
|
|
|
c->msg = c->child->msg;
|
2016-07-19 15:31:49 +00:00
|
|
|
c->date = c->child->date;
|
2017-06-30 11:28:05 +00:00
|
|
|
c->optional = c->child->optional;
|
2016-07-18 17:42:25 +00:00
|
|
|
c->child = c->child->child;
|
|
|
|
}
|
|
|
|
} while ((c = c->next));
|
|
|
|
}
|
|
|
|
|
2017-06-30 11:28:05 +00:00
|
|
|
int
|
|
|
|
alloptional(struct container *c)
|
|
|
|
{
|
|
|
|
do {
|
|
|
|
if (!c->optional && c->file)
|
|
|
|
return 0;
|
|
|
|
if (c->child && !alloptional(c->child))
|
|
|
|
return 0;
|
|
|
|
} while ((c = c->next));
|
|
|
|
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
2016-07-19 15:31:49 +00:00
|
|
|
static int
|
|
|
|
dateorder(const void *a, const void *b)
|
|
|
|
{
|
|
|
|
struct container *ia = *(struct container **)a;
|
|
|
|
struct container *ib = *(struct container **)b;
|
|
|
|
|
|
|
|
if (ia->date < ib->date)
|
|
|
|
return -1;
|
|
|
|
else if (ia->date > ib->date)
|
|
|
|
return 1;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
sort_tree(struct container *c, int depth)
|
|
|
|
{
|
|
|
|
if (c && c->child) {
|
|
|
|
struct container *r;
|
|
|
|
int i, j;
|
|
|
|
for (r = c->child, i = 0; r; r = r->next, i++)
|
|
|
|
sort_tree(r, depth+1);
|
|
|
|
|
2017-06-30 11:28:05 +00:00
|
|
|
if (i == 1) // no sort needed
|
2016-07-19 15:31:49 +00:00
|
|
|
return;
|
|
|
|
|
|
|
|
struct container **a = calloc(sizeof (struct container *), i);
|
|
|
|
if (!a)
|
|
|
|
return;
|
|
|
|
|
|
|
|
for (r = c->child, i = 0; r; r = r->next, i++)
|
|
|
|
a[i] = r;
|
|
|
|
|
|
|
|
qsort(a, i, sizeof (struct container *), dateorder);
|
|
|
|
|
|
|
|
c->child = a[0];
|
|
|
|
for (j = 0; j+1 < i; j++)
|
|
|
|
a[j]->next = a[j+1];
|
|
|
|
a[i-1]->next = 0;
|
|
|
|
|
|
|
|
free(a);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
void
|
|
|
|
print_tree(struct container *c, int depth)
|
|
|
|
{
|
|
|
|
do {
|
2017-06-30 11:28:05 +00:00
|
|
|
// skip toplevel threads when they are unresolved or all optional
|
|
|
|
if (depth <= 1 &&
|
|
|
|
(c->optional || !c->file) &&
|
|
|
|
(!c->child || alloptional(c->child)))
|
|
|
|
continue;
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
if (depth >= 0) {
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < depth; i++)
|
|
|
|
printf(" ");
|
|
|
|
if (c->file)
|
|
|
|
printf("%s\n", c->file);
|
|
|
|
else
|
|
|
|
printf("<%s>\n", c->mid);
|
|
|
|
}
|
2017-08-31 15:30:17 +00:00
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
if (c->child)
|
|
|
|
print_tree(c->child, depth+1);
|
2016-07-14 16:20:42 +00:00
|
|
|
} while ((c = c->next));
|
2016-07-11 12:23:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
2016-07-20 16:47:40 +00:00
|
|
|
int c, i;
|
|
|
|
|
2017-06-30 11:28:05 +00:00
|
|
|
optional = 1;
|
|
|
|
|
2017-08-31 15:30:17 +00:00
|
|
|
while ((c = getopt(argc, argv, "S:v")) != -1)
|
|
|
|
switch (c) {
|
|
|
|
case 'S': blaze822_loop1(optarg, thread); break;
|
|
|
|
case 'v': vflag = 1; break;
|
|
|
|
default:
|
2017-06-30 11:28:05 +00:00
|
|
|
fprintf(stderr, "Usage: mthread [-v] [-S dir] [msgs...]\n");
|
2017-08-31 15:30:17 +00:00
|
|
|
exit(1);
|
|
|
|
}
|
2016-07-20 16:47:40 +00:00
|
|
|
|
2017-06-30 11:28:05 +00:00
|
|
|
optional = 0;
|
|
|
|
|
2016-07-22 22:29:11 +00:00
|
|
|
if (argc == optind && isatty(0))
|
|
|
|
i = blaze822_loop1(":", thread);
|
|
|
|
else
|
2016-07-20 16:47:40 +00:00
|
|
|
i = blaze822_loop(argc-optind, argv+optind, thread);
|
2016-07-11 12:23:59 +00:00
|
|
|
|
|
|
|
find_roots();
|
2016-07-20 16:47:40 +00:00
|
|
|
if (!vflag)
|
|
|
|
prune_tree(top, -1);
|
2016-07-19 15:31:49 +00:00
|
|
|
sort_tree(top, -1);
|
2016-07-11 12:23:59 +00:00
|
|
|
print_tree(top, -1);
|
|
|
|
|
2016-07-18 16:51:38 +00:00
|
|
|
fprintf(stderr, "%d mails threaded\n", i);
|
|
|
|
|
2016-07-11 12:23:59 +00:00
|
|
|
return 0;
|
|
|
|
}
|