printing a binary tree in C (and other imperative languages)

(First-time poster and rather new in programming, so be patient, please!)

I'm interested in both an efficient general algorithm for printing formatted binary trees (in a CLI environment) and a C implementation. Here is some code that I wrote myself for fun (this is a much simplified version of the original and part of a larger program supporting many BST operations, but it should compile just fine):

#include <stdbool.h>    // C99, boolean type support
#include <stdio.h>
#include <stdlib.h>
#include <math.h>

#define DATATYPE_IS_DOUBLE
#define NDEBUG                      // disable assertions
#include <assert.h>

#define WCHARBUF_LINES          20  // def: 20
#define WCHARBUF_COLMS          800 // def: 80 (using a huge number, like 500, is a good idea,
                                    //          in order to prevent a buffer overflow :)
#define RECOMMENDED_CONS_WIDTH  150
#define RECOMMENDED_CONS_WIDTHQ "150"   // use the same value, quoted

/* Preprocessor directives depending on DATATYPE_IS_* : */
#if defined DATATYPE_IS_INT || defined DATATYPE_IS_LONG
    #define DTYPE           long int
    #define DTYPE_STRING    "INTEGER"
    #define DTYPE_PRINTF    "%*.*ld"
    #undef DATATYPE_IS_CHAR

#elif defined DATATYPE_IS_FLOAT
    #define DTYPE           float
    #define DTYPE_STRING    "FLOAT"
    #define DTYPE_PRINTF    "%*.*f"
    #undef DATATYPE_IS_CHAR

#elif defined DATATYPE_IS_DOUBLE
    #define DTYPE           double
    #define DTYPE_STRING    "DOUBLE"
    #define DTYPE_PRINTF    "%*.*lf"
    #undef DATATYPE_IS_CHAR

#elif defined DATATYPE_IS_CHAR
    #define DTYPE           char
    #define DTYPE_STRING    "CHARACTER"
    #define DTYPE_PRINTF    "%*.*c" /* using the "precision" sub-specifier ( .* ) with a  */
                                    /* character will produce a harmless compiler warning */
#else
    #error "DATATYPE_IS_* preprocessor directive undefined!"

#endif


typedef struct node_struct {
    DTYPE data;
    struct node_struct *left;
    struct node_struct *right;
    /* int height;  // useful for AVL trees */
} node;

typedef struct {
    node *root;
    bool IsAVL;     // useful for AVL trees
    long size;
} tree;


static inline
DTYPE get_largest(node *n){
    if (n == NULL)
        return (DTYPE)0;

    for(; n->right != NULL; n=n->right);

    return n->data;
}

static
int subtreeheight(node *ST){
    if (ST == NULL)
        return -1;

    int height_left  = subtreeheight(ST->left);
    int height_right = subtreeheight(ST->right);

    return (height_left > height_right) ? (height_left + 1) : (height_right + 1);
}


void prettyprint_tree(tree *T){
    if (T == NULL)  // if T empty, abort
        return;

#ifndef DATATYPE_IS_CHAR /* then DTYPE is a numeric type */
    /* compute spaces, find width: */
    int width, i, j;
    DTYPE max = get_largest(T->root);

    width = (max < 10) ? 1 :
            (max < 100) ? 2 :
            (max < 1000) ? 3 :
            (max < 10000) ? 4 :
            (max < 100000) ? 5 :
            (max < 1000000) ? 6 :
            (max < 10000000) ? 7 :
            (max < 100000000) ? 8 :
            (max < 1000000000) ? 9 : 10;
    assert  (max < 10000000000);

    width += 2; // needed for prettier results

#if defined DATATYPE_IS_FLOAT || defined DATATYPE_IS_DOUBLE
    width += 2; // because of the decimals! (1 decimal is printed by default...)
#endif // float or double

    int spacesafter = width / 2;
    int spacesbefore = spacesafter + 1;
    //int spacesbefore = ceil(width / 2.0);

#else /* character input */
    int i, j, width = 3, spacesbefore = 2, spacesafter = 1;

#endif // #ifndef DATATYPE_IS_CHAR

    /* start wchar_t printing, using a 2D character array with swprintf() : */

    struct columninfo{  // auxiliary structure
        bool visited;
        int  col;
    };

    wchar_t wcharbuf[WCHARBUF_LINES][WCHARBUF_COLMS];
    int line=0;
    struct columninfo eachline[WCHARBUF_LINES];

    for (i=0; i<WCHARBUF_LINES; ++i){       // initialization
        for (j=0; j<WCHARBUF_COLMS; ++j)
            wcharbuf[i][j] = (wchar_t)' ';
        eachline[i].visited = false;
        eachline[i].col = 0;
    }

    int height = subtreeheight(T->root);

    void recur_swprintf(node *ST, int cur_line, const wchar_t *nullstr){ // nested function,
                                                                            // GCC extension!
        float offset = width * pow(2, height - cur_line);

        ++cur_line;

        if (eachline[cur_line].visited == false) {
            eachline[cur_line].col = (int) (offset / 2);
            eachline[cur_line].visited = true;
        }
        else{
            eachline[cur_line].col += (int) offset;
            if (eachline[cur_line].col + width > WCHARBUF_COLMS)
                swprintf(wcharbuf[cur_line], L"  BUFFER OVERFLOW DETECTED! ");
        }

        if (ST == NULL){
            swprintf(wcharbuf[cur_line] + eachline[cur_line].col, L"%*.*s", 0, width, nullstr);
            if (cur_line <= height){
                /* use spaces instead of the nullstr for all the "children" of a NULL node */
                recur_swprintf(NULL, cur_line, L"          ");
                recur_swprintf(NULL, cur_line, L"          ");
            }
            else
                return;
        }
        else{
            recur_swprintf(ST->left,  cur_line, nullstr);
            recur_swprintf(ST->right, cur_line, nullstr);

            swprintf(wcharbuf[cur_line] + eachline[cur_line].col - 1, L"("DTYPE_PRINTF"",
                     spacesbefore, 1, ST->data);

          //swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 1, L")");
            swprintf(wcharbuf[cur_line] + eachline[cur_line].col + spacesafter + 2, L")");
        }
    }

    void call_recur(tree *tr){  // nested function, GCC extension! (wraps recur_swprintf())
        recur_swprintf(tr->root, -1, L"NULL");
    }

    call_recur(T);

    /* Omit empty columns: */
    int omit_cols(void){        // nested function, GCC extension!
        int col;

        for (col=0; col<RECOMMENDED_CONS_WIDTH; ++col)
            for (line=0; line <= height+1; ++line)
                if (wcharbuf[line][col] != ' ' && wcharbuf[line][col] != '')
                    return col;

        return 0;
    }

    /* Use fputwc to transfer the character array to the screen: */
    j = omit_cols() - 2;
    j = (j < 0) ? 0 : j;

    for (line=0; line <= height+1; ++line){     // assumes RECOMMENDED_CONS_WIDTH console window!
        fputwc('n', stdout);                   // optional blanc line
        for (i=j; i<j+RECOMMENDED_CONS_WIDTH && i<WCHARBUF_COLMS; ++i)
            fputwc(wcharbuf[line][i], stdout);
        fputwc('n', stdout);
    }
}

(also uploaded to a pastebin service, in order to preserve syntax highlighting)

It works quite well, although the automatic width setting could be better. The preprocessor magic is a bit silly (or even ugly) and not really related to the algorithm, but it allows using various data types in the tree nodes (I saw it as a chance to experiment a bit with the preprocessor - remember, I am a newbie!).

The main program is supposed to call

system("mode con:cols="RECOMMENDED_CONS_WIDTHQ" lines=2000");

before calling prettyprint_tree(), when running inside cmd.exe .

Sample output:


                                                  (106.0)


                      (102.0)                                                 (109.0)


        (101.5)                      NULL                       (107.0)                     (115.0)


  NULL          NULL                                     (106.1)        NULL         (113.0)        NULL


                                                      NULL   NULL                 NULL   NULL

Ideally, the output would be like this (the reason I'm using the wprintf() family of functions is being able to print Unicode characters anyway):

            (107.0)
         ┌─────┴─────┐
     (106.1)        NULL
   ┌───┴───┐
  NULL   NULL

So, my questions:

  • What do you think about this code? (Coding style suggestions are also very welcome!)
  • Can it be extended in an elegant way in order to include the line-drawing characters? (Unfortunately, I don't think so.)
  • Any other algorithms in C or other imperative languages (or imperative pseudo-code)?
  • Somewhat unrelated: What's your opinion about nested functions (non-portable GNU extension)? I think it's an elegant way to write recursive parts of a function without having to provide all the local variables as arguments (and also useful as an implementation-hiding technique), but it could be my Pascal past :-) I'm interested in the opinion of more experienced coders.
  • Thank you in advance for your responses!

    PS. The question is not a duplicate of this one.


    edit: Jonathan Leffler wrote an excellent answer that will most probably become the "accepted answer" after a few days (unless someone posts something equally awesome!). I decided to respond here instead of commenting because of the space constraints.

  • The code above is actually part of a larger "homework" project (implementing BST operations in a shared library + a CLI app using that library). However, the "prettyprint" function was not part of the requirements; just something I decided to add myself.
  • I also added a "convert to AVL without rotations" function, that used "arraystr" as an intermediate representation ;-) I forgot that it wasn't used here. I've edited the code to remove it. Also, the bool IsAVL struct member is anything but unused; just not used in this particular function. I had to copy/paste code from various files and make a lot of changes in order to present the code cited above. That's a problem that I don't know how to solve. I would gladly post the whole program, but it is too large and commented in my mother-tongue (not in English!).
  • The whole project was about 1600 LOC (including comments) with multiple build targets (debug/release/static-linking) and it compiled cleanly with -Wall and -Wextra enabled. Assertions and debug messages were enabled/disabled automatically depending on the build target. Also I thought that function prototypes weren't needed for nested functions, after all nested functions do not implement any external interface by definition - GCC certainly didn't complain here. I don't know why there are so many warnings on OSX :(
  • I'm using GCC 4.4.1 on Windows 7.
  • Despite writing and testing this program on Windows, I am actually a Linux user... Still, I can't stand vim and I use nano (inside GNU screen) or gedit instead (shoot me)! In any case, I prefer the K&R brace style :)
  • Portability doesn't really matter, for Linux users GCC is pretty much de facto... The fact that it also works well under Windows is a nice bonus.
  • I'm not using a VCS, perhaps I should. I want to try, but all of them seem too complex for my needs and I don't know how to choose one :-)
  • You are definitely right about checking for depth overflow, thankfully it is very easy to add.
  • Thanks for the L' ' advice!
  • I find your suggestion (encapsulating "the whole of the drawing code so that the screen image and related information is in a single structure") extremely interesting... but I don't really understand what you mean as "encapsulation". Could you, please, provide 3 or 4 lines of (pseudo)code showing a possible function declaration and/or a possible function call?
  • This is my first "large-ish" (and non-trivial) program and I'm really thankful for your advice.


    edit #2: Here is an implementation of the "quick and dirty" method mentioned here.
    ( edit #3: I decided to split it to a separate answer, since it is a valid answer to the OP.)

    Many responses mentioned Graphviz. I already knew about it (many Linux apps are linked against it) but I thought it would be overkill for a 10KB CLI executable. However, I'll keep it in mind for the future. It seems great.


    You need to decide on whether your code needs to be portable. If you might ever need to use a compiler other than GCC, the nested functions are lethal to your portability goal. I would not use them - but my portability goals may not be the same as yours.

    Your code is missing <wchar.h> ; it compiles fairly cleanly without it - GCC complained about missing prototypes for your non-static functions and for swprintf() and fputwc() ), but adding <wchar.h> generates a lot of serious warnings related to swprintf() ; they are actually diagnosing a bug.

    gcc -O -I/Users/jleffler/inc -std=c99 -Wall -Wextra -Wmissing-prototypes 
        -Wstrict-prototypes -Wold-style-definition -c tree.c
    tree.c:88:6: warning: no previous prototype for ‘prettyprint_tree’
    tree.c: In function ‘prettyprint_tree’:
    tree.c:143:10: warning: no previous prototype for ‘recur_swprintf’
    tree.c: In function ‘recur_swprintf’:
    tree.c:156:17: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
    /usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
    tree.c:156:17: error: too few arguments to function ‘swprintf’
    /usr/include/wchar.h:135:5: note: declared here
    tree.c:160:13: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
    /usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
    tree.c:174:22: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
    /usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
    tree.c:174:22: warning: passing argument 3 of ‘swprintf’ makes pointer from integer without a cast
    /usr/include/wchar.h:135:5: note: expected ‘const wchar_t * restrict’ but argument is of type ‘int’
    tree.c:177:13: warning: passing argument 2 of ‘swprintf’ makes integer from pointer without a cast
    /usr/include/wchar.h:135:5: note: expected ‘size_t’ but argument is of type ‘int *’
    tree.c:177:13: error: too few arguments to function ‘swprintf’
    /usr/include/wchar.h:135:5: note: declared here
    tree.c: In function ‘prettyprint_tree’:
    tree.c:181:10: warning: no previous prototype for ‘call_recur’
    tree.c:188:9: warning: no previous prototype for ‘omit_cols’
    

    (This is GCC 4.5.2 on MacOS X 10.6.5.)

  • Do look up the interface to swprintf() ; it is more like snprintf() than sprintf() (which is A Good Thing™!).
  • The overall idea is interesting. I suggest choosing one representation when submitting your code for analysis, and cleaning up anything that is not relevant to the code analysis. For example, the arraystr type is defined but unused - you don't want to let people like me get cheap shots at your code. Similarly with the unused structure members; don't even leave them as comments, even if you might want to keep them in the code in your VCS (though why?). You are using a version control system (VCS), aren't you? And that's a rhetorical question - if you aren't using a VCS, start using one now, before you lose something you value.

    Design-wise, you want to avoid doing things like requiring the main program to run an obscure system() command - your code should take care of such issues (maybe with an initializer function, and perhaps a finalizer function to undo the changes made to the terminal settings).

    One more reason not to like nested functions: I can't work out how to get a declaration of the function in place. What seemed like plausible alternatives did not work - but I didn't go and read the GCC manual on them.

  • You check for column-width overflow; you do not check for depth overflow. Your code will crash and burn if you create a tree that is too deep.
  • Minor nit: you can tell people who do not use 'vi' or 'vim' to edit - they don't put the opening brace of a function in column 1. In 'vi', the opening brace in column 1 gives you an easy way to the start of a function from anywhere inside it ('[[' to jump backwards; ']]' to jump to the start of the next function).

    Don't disable assertions.

    Do include a main program and the relevant test data - it means people can test your code, instead of just compiling it.

    Use wide-character constants instead of casts:

    wcharbuf[i][j] = (wchar_t)' ';
    

    wcharbuf[i][j] = L' ';
    

    Your code creates a big screen image (20 lines x 800 columns in the code) and fills in the data to be printed. That's a reasonable way to do it. With care, you could arrange to handle the line-drawing characters. However, I think you would need to rethink the core drawing algorithms. You would probably want to encapsulate the whole of the drawing code so that the screen image and related information is in a single structure, which can be passed by reference (pointer) to functions. You'd have a set of functions to draw various bits at positions your tree-searching code designates. You would have a function to draw the data value at an appropriate position; you would have a function to draw lines at appropriate positions. You would probably not have nested functions - it is, to my eyes, far harder to read the code when there's a function nested inside another. Making functions static is good; make the nested functions into static (non-nested) functions. Give them the context they need - hence the encapsulation of the screen image.

  • Overall a good start; lots of good ideas. Lots still to do.

  • Request for information on encapsulation...

    You could use a structure such as:

    typedef struct columninfo Colinfo;
    
    typedef struct Image
    {
        wchar_t    image[WCHARBUF_LINES][WCHARBUF_COLUMNS];
        Colinfo    eachline[WCHARBUF_LINES];
    } Image;
    
    Image image;
    

    You might find it convenient and/or sensible to add some extra members; that would show up during the implementation. You might then create a function:

    void format_node(Image *image, int line, int column, DTYPE value)
    {
        ...
    }
    

    You could also make some of the constants, such as spacesafter into enum values:

    enum { spacesafter = 2 };
    

    These can then be used by any of the functions.


    Coding style : The prettyprint_tree() function juggles too much computation and data to be comfortable to read. Initialization and printing of the image buffer can for example be placed in separate functions and the width computation also. I am sure you can write a formula with log to replace the

    width = (max < 10) ? 1 :
            (max < 100) ? 2 :
            (max < 1000) ? 3 :
            ...
    

    computation.

    I am not used to reading nested functions and C, which makes it much harder for me to scan your code. Unless you don't share your code with others or have ideological reasons for tying the code to GCC, I wouldn't use those extensions.

    Algorithm : For a quick and dirty pretty-printer, written in C, I would never use your style of layout. In comparison to your algorithm, it is a no-brainer to write an in-order traversal to print

       a
      / 
     b   c
    

    as

         c
     a
         b
    

    and I don't mind having to tilt my head. For anything prettier than that I would much rather emit

    digraph g { a -> b; a -> c; }
    

    and leave it to dot to do the formatting.


    此代码应该从以下网址获得:http://www.ihas1337code.com/2010/09/how-to-pretty-print-binary-tree.html

        #include <fstream>
    #include <iostream>
    #include <deque>
    #include <iomanip>
    #include <sstream>
    #include <string>
    #include <cmath>
    using namespace std;
    
    struct BinaryTree {
      BinaryTree *left, *right;
      int data;
      BinaryTree(int val) : left(NULL), right(NULL), data(val) { }
    };
    
    // Find the maximum height of the binary tree
    int maxHeight(BinaryTree *p) {
      if (!p) return 0;
      int leftHeight = maxHeight(p->left);
      int rightHeight = maxHeight(p->right);
      return (leftHeight > rightHeight) ? leftHeight + 1: rightHeight + 1;
    }
    
    // Convert an integer value to string
    string intToString(int val) {
      ostringstream ss;
      ss << val;
      return ss.str();
    }
    
    // Print the arm branches (eg, /     ) on a line
    void printBranches(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) {
      deque<BinaryTree*>::const_iterator iter = nodesQueue.begin();
      for (int i = 0; i < nodesInThisLevel / 2; i++) {
        out << ((i == 0) ? setw(startLen-1) : setw(nodeSpaceLen-2)) << "" << ((*iter++) ? "/" : " ");
        out << setw(2*branchLen+2) << "" << ((*iter++) ? "" : " ");
      }
      out << endl;
    }
    
    // Print the branches and node (eg, ___10___ )
    void printNodes(int branchLen, int nodeSpaceLen, int startLen, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) {
      deque<BinaryTree*>::const_iterator iter = nodesQueue.begin();
      for (int i = 0; i < nodesInThisLevel; i++, iter++) {
        out << ((i == 0) ? setw(startLen) : setw(nodeSpaceLen)) << "" << ((*iter && (*iter)->left) ? setfill('_') : setfill(' '));
        out << setw(branchLen+2) << ((*iter) ? intToString((*iter)->data) : "");
        out << ((*iter && (*iter)->right) ? setfill('_') : setfill(' ')) << setw(branchLen) << "" << setfill(' ');
      }
      out << endl;
    }
    
    // Print the leaves only (just for the bottom row)
    void printLeaves(int indentSpace, int level, int nodesInThisLevel, const deque<BinaryTree*>& nodesQueue, ostream& out) {
      deque<BinaryTree*>::const_iterator iter = nodesQueue.begin();
      for (int i = 0; i < nodesInThisLevel; i++, iter++) {
        out << ((i == 0) ? setw(indentSpace+2) : setw(2*level+2)) << ((*iter) ? intToString((*iter)->data) : "");
      }
      out << endl;
    }
    
    // Pretty formatting of a binary tree to the output stream
    // @ param
    // level  Control how wide you want the tree to sparse (eg, level 1 has the minimum space between nodes, while level 2 has a larger space between nodes)
    // indentSpace  Change this to add some indent space to the left (eg, indentSpace of 0 means the lowest level of the left node will stick to the left margin)
    void printPretty(BinaryTree *root, int level, int indentSpace, ostream& out) {
      int h = maxHeight(root);
      int nodesInThisLevel = 1;
    
      int branchLen = 2*((int)pow(2.0,h)-1) - (3-level)*(int)pow(2.0,h-1);  // eq of the length of branch for each node of each level
      int nodeSpaceLen = 2 + (level+1)*(int)pow(2.0,h);  // distance between left neighbor node's right arm and right neighbor node's left arm
      int startLen = branchLen + (3-level) + indentSpace;  // starting space to the first node to print of each level (for the left most node of each level only)
    
      deque<BinaryTree*> nodesQueue;
      nodesQueue.push_back(root);
      for (int r = 1; r < h; r++) {
        printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out);
        branchLen = branchLen/2 - 1;
        nodeSpaceLen = nodeSpaceLen/2 + 1;
        startLen = branchLen + (3-level) + indentSpace;
        printNodes(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out);
    
        for (int i = 0; i < nodesInThisLevel; i++) {
          BinaryTree *currNode = nodesQueue.front();
          nodesQueue.pop_front();
          if (currNode) {
              nodesQueue.push_back(currNode->left);
              nodesQueue.push_back(currNode->right);
          } else {
            nodesQueue.push_back(NULL);
            nodesQueue.push_back(NULL);
          }
        }
        nodesInThisLevel *= 2;
      }
      printBranches(branchLen, nodeSpaceLen, startLen, nodesInThisLevel, nodesQueue, out);
      printLeaves(indentSpace, level, nodesInThisLevel, nodesQueue, out);
    }
    
    int main() {
      BinaryTree *root = new BinaryTree(30);
      root->left = new BinaryTree(20);
      root->right = new BinaryTree(40);
      root->left->left = new BinaryTree(10);
      root->left->right = new BinaryTree(25);
      root->right->left = new BinaryTree(35);
      root->right->right = new BinaryTree(50);
      root->left->left->left = new BinaryTree(5);
      root->left->left->right = new BinaryTree(15);
      root->left->right->right = new BinaryTree(28);
      root->right->right->left = new BinaryTree(41);
    
      cout << "Tree pretty print with level=1 and indentSpace=0nn";
      // Output to console
      printPretty(root, 1, 0, cout);
    
      cout << "nnTree pretty print with level=5 and indentSpace=3,noutput to file "tree_pretty.txt".nn";
      // Create a file and output to that file
      ofstream fout("tree_pretty.txt");
      // Now print a tree that's more spread out to the file
      printPretty(root, 5, 0, fout);
    
      return 0;
    }
    
    链接地址: http://www.djcxy.com/p/73286.html

    上一篇: 将源代码翻译成外语

    下一篇: 用C(和其他命令式语言)打印二叉树