/****************************************************************************
FILE          : comment-strip.c
LAST REVISION : 2003-01-19
SUBJECT       : Program to remove comments from C++ source.
PROGRAMMER    : (C) Copyright 2003 by Peter Chapin

This program is the solution to Vermont Technical College's CIS-4050
Worksheet #1. See the state diagram in comment-strip.zargo for more
information.

Please send comments or bug reports to

     Peter Chapin
     Vermont Technical College
     Randolph Center, VT 05061
     pchapin@ecet.vtc.edu
****************************************************************************/

#include <stdio.h>

// My state machine requires nine states.
enum state_type {
  NORMAL,
  MAYBE_COMMENT, SLASH_SLASH, SLASH_STAR, MAYBE_END,
  D_QUOTE, D_ESCAPE, S_QUOTE, S_ESCAPE
};

// ============
// Main Program
// ============

int main(void)
{
  enum state_type state = NORMAL;
  int ch;

  // Read the input a character at a time...
  while ((ch = getchar()) != EOF) {
    switch (state) {
      case NORMAL:
        if (ch == '/') state = MAYBE_COMMENT;
        else if (ch == '"') { putchar(ch); state = D_QUOTE; }
        else if (ch == '\'') { putchar(ch); state = S_QUOTE; }
        else putchar(ch);
        break;

      // A space on entry into a slash-slash comment is probably not
      // necessary since the '\n' at the end will serve the same need.
      // Also... I unget any character that is not a real start of
      // comment so that it can be handled later from state NORMAL.
      // 
      case MAYBE_COMMENT:
        if (ch == '/') { putchar(' '); state = SLASH_SLASH; }
        else if (ch == '*') { putchar(' '); state = SLASH_STAR; }
        else { putchar('/'); ungetc(ch, stdin); state = NORMAL; }
        break;

      case SLASH_SLASH:
        if (ch == '\n') { putchar(ch); state = NORMAL; }
        break;

      // Print newline characters here to preserve line numbers in output.
      case SLASH_STAR:
        if (ch == '\n') putchar(ch);
        if (ch == '*') state = MAYBE_END;
        break;

      // Stay in MAYBE_END if a '*' is seen.
      case MAYBE_END:
        if (ch == '/') state = NORMAL;
        else if (ch != '*') state = SLASH_STAR;
        break;

      case D_QUOTE:
        putchar(ch);
        if (ch == '"') state = NORMAL;
        else if (ch == '\\') state = D_ESCAPE;
        break;

      case S_QUOTE:
        putchar(ch);
        if (ch == '\'') state = NORMAL;
        else if (ch == '\\') state = S_ESCAPE;
        break;

      case D_ESCAPE:
        putchar(ch);
        state = D_QUOTE;
        break;

      case S_ESCAPE:
        putchar(ch);
        state = S_QUOTE;
        break;
    }
  }

  // When the loop ends I should be in the state NORMAL (SLASH_SLASH
  // would be okay too). If I'm not in a sensible state, handle it.
  //
  if (state == D_QUOTE || state == D_ESCAPE) {
    fprintf(stderr, "Unclosed double quote at end-of-file.\n");
  }
  if (state == S_QUOTE || state == S_ESCAPE) {
    fprintf(stderr, "Unclosed single quote at end-of-file.\n");
  }
  if (state == MAYBE_COMMENT) {
    // If the last character in the file was a slash, it's got to be
    // some kind of error. I'm not going to worry about it.
    putchar('/');
  }
  if (state == SLASH_STAR || state == MAYBE_END) {
    fprintf(stderr, "Unclosed slash-star comment at end-of-file.\n");
  }
  return 0;
}
